# ---
# title: "Negotiating with your mouth full (ingestion and processing code)"
# in support of paper: "Negotiating with your mouth full: Intergovernmental negotiations between transparency and confidentiality"
# paper authors: "Mareike Kleine and Samuel Huntington"
# code point of contact: "Samuel Huntington"
# date: "16 September, 2024"
# ---

# libraries
library(janitor)
library(tidyr)
library(pscl)
library(dplyr)
library(ggplot2)
library(readxl)
library(lubridate)
library(readr)
library(tseries)
library(haven)
library(writexl)


# ingest session-level data
sessi_data <- read_xlsx("./dataset_core.xlsx", col_names = TRUE, sheet = 1)

# set up variables for date, semester, de facto transparency, agenda items, and sessions-with-meals (binary)
sessi_data <- mutate(sessi_data,Date=as.Date(Date))
sessi_data$Semester <- as.numeric(substr(sessi_data$Semester,1,4))+as.numeric(substr(sessi_data$Semester,6,6))/2-0.5
sessi_data <- mutate(sessi_data,ln_n_members=log(N_Members),.after=N_Members)
sessi_data <- mutate(sessi_data,ln_agenda = log(Agenda_items),.after=Agenda_items)
sessi_data <- mutate(sessi_data,swm=Meals>0,.after=Meals)

# ingest semester-level data
seme_data <- read_xlsx("./dataset_core.xlsx", col_names = TRUE, sheet = 2)

# set up semester and de facto variables
seme_data <- mutate(seme_data,sem = as.numeric(substr(Semester,1,4)) + as.numeric(substr(Semester,6,6))/2 - .5, .before = Semester)
seme_data <- mutate(seme_data,ln_n_members=log(N_Members),.after=N_Members)
seme_data <- mutate(seme_data,ln_agenda = log(Agenda_items),.after=Agenda_items)


# ingest and set up cpi independent variable
# pre-2012 load
cpi_early <- data.frame()
for(x in 2011:1995) {
  cpi_early <- read_csv(paste0("https://images.transparencycdn.org/images/CPI-Archive-",x,".csv")) %>%
    mutate(year=x) %>%
    filter(!year %in% c(2004,2006,2007,2008,2011)) %>% # will load these years manually due to inconsistent formatting
    bind_rows(cpi_early)
}
# add inconsistently formatted years
cpi_early <- cpi_early %>%
  bind_rows(mutate(read.csv("https://images.transparencycdn.org/images/CPI-2004_200602_110140.csv"),year=2004),
            mutate(read.csv("https://images.transparencycdn.org/images/CPI-2006-new_200602_095933.csv"),year=2006),
            mutate(read.csv("https://images.transparencycdn.org/images/CPI-2007-new_200602_092501.csv"),year=2007),
            mutate(read.csv("https://images.transparencycdn.org/images/CPI-Archive-2008-2.csv"),year=2008),
            mutate(read.csv("https://images.transparencycdn.org/images/CPI-2011-new_200601_104308.csv"),year=2011,interval=as.character(interval)))
# filter non-eu countries out
cpi_early <- cpi_early %>%
  filter(region=="WE/EU" & !iso %in% c("NOR","ISL","CHE","HRV")) %>%
  filter(iso!="ROU" & iso!="BGR" | year>2006) %>%
  filter(!iso %in% c("SVN","MLT","CYP","EST","LVA","LTU","POL","HUN","CZE","SVK") | year>2003) %>%
  mutate(score=score*10)
# standardise czechia name
cpi_early[grep("Cz",cpi_early$country),"country"] <- "Czechia"
# post-2011 load
# file downloaded from https://images.transparencycdn.org/images/CPI_FULL_DATA_2021-01-27-162209.zip
cpi_late <- read_xlsx("./CPI2020_GlobalTablesTS_210125.xlsx", col_names = TRUE, sheet = 2)
colnames(cpi_late) <- as.character(cpi_late[2,])
cpi_late <- cpi_late[-1,]
cpi_late <- cpi_late[-1,]
# filter non-eu countries out
cpi_late <- cpi_late %>%
  clean_names() %>%
  filter(region=="WE/EU" & !iso3 %in% c("NOR","ISL","CHE")) %>%
  mutate(across(colnames(.)[-c(1:3)],as.numeric))
# bind them
cpi_combined <- cpi_early
for(y in 2012:2020) {
  for(c in 1:nrow(cpi_late)) {
    cpi_combined <- bind_rows(cpi_combined,data.frame(country=cpi_late[c,"country"],iso=as.character(cpi_late[c,"iso3"]),
                                              score=as.numeric(cpi_late[c,paste0("cpi_score_",y)]),year=y))
  }
}
# filter pre-eu croatia out
cpi_combined <- cpi_combined[cpi_combined$iso!="HRV" | cpi_combined$year>2012,]
# country averages
cpi_combined %>%
  group_by(iso) %>%
  summarise(cpi=round(mean(score))) %>%
  arrange(-cpi) %>%
  mutate(iso=factor(iso,levels=iso)) %>%
  ggplot(aes(iso,cpi,fill=iso)) +
  geom_col() +
  geom_text(aes(label=cpi),vjust = -0.5) +
  theme(legend.position = "none", axis.text.x = element_text(angle = 90, vjust = 0.5, hjust=1))
# yearly averages
cpi_summary <- cpi_combined %>%
  group_by(year) %>%
  summarise(cpi_mean=mean(score),cpi_median=median(score))
# member state plot
ggplot(cpi_combined,aes(year,score)) +
  geom_line(aes(colour=country)) +
  geom_text(aes(label=country),data=filter(cpi_combined,year==max(year)),size=3,nudge_x=0.5) +
  geom_line(data=cpi_summary,aes(y=cpi_mean),size=1,colour="red") +
  geom_label(data=filter(cpi_summary,year==max(year)),aes(y=cpi_mean),label="Mean CPI",size=3) +
  geom_line(data=cpi_summary,aes(y=cpi_median),size=1,colour="blue") +
  geom_label(data=filter(cpi_summary,year==max(year)),aes(y=cpi_median),label="Median CPI",size=3)
# fill pre-1995 in
for(year in 1994:1990) {
  cpi_summary <- bind_rows(data.frame(year,
                                  cpi_mean=mean(cpi_combined$score[cpi_combined$year==1995 & !cpi_combined$iso %in% c("FIN","SWE","AUT")]),
                                  cpi_median=median(cpi_combined$score[cpi_combined$year==1995 & !cpi_combined$iso %in% c("FIN","SWE","AUT")]))
                       ,cpi_summary)
}
# join to main data-sets
seme_data <- inner_join(cpi_summary,seme_data %>%
                         rename(year=Year) %>%
                         mutate(year=as.numeric(year)))
sessi_data <- inner_join(cpi_summary,rename(sessi_data,year=Year),by="year")


# ingest and set up manifestos data containing election results
# tidy the data-set
natl_manifest <- read_csv("./MPDataset_MPDS2021a.csv") %>%
  filter(eumember==10 & date > 197801) %>%
  select(edate,countryname,partyabbrev,partyname,per108,per110,pervote,presvote,rile) %>%
  mutate(edate = as.Date(edate,"%d/%m/%Y")) %>%
  group_by(edate) %>%
  slice_max(pervote, n=2) %>%
  mutate(margin = max(pervote)-min(pervote)) %>%
  slice_max(pervote) %>%
  ungroup() %>%
  mutate(clos = (margin<5)) %>%
  mutate(net_eu_view=per108-per110, .after = per110) %>%
  mutate(skep = net_eu_view<0, .after = net_eu_view) %>%
  arrange(edate)
# join to main data-set along with election counts
sessi_data <- sessi_data %>%
  rowwise() %>%
  mutate(recent = sum(natl_manifest$edate < Date & natl_manifest$edate > add_with_rollback(Date,months(-2))), .before=Agenda_items) %>%
  mutate(recent_close = sum(natl_manifest$edate < Date & natl_manifest$edate > add_with_rollback(Date,months(-2)) &
                              natl_manifest$clos), .before=Agenda_items) %>%
  mutate(recent_big = sum(natl_manifest$edate < Date & natl_manifest$edate > add_with_rollback(Date,months(-2)) &
                            natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom")), .before=Agenda_items) %>%
  mutate(recent_big_close = sum(natl_manifest$edate < Date & natl_manifest$edate > add_with_rollback(Date,months(-2)) &
                                  natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom") &
                                  natl_manifest$clos), .before=Agenda_items) %>%
  mutate(upcoming = sum(natl_manifest$edate> Date & natl_manifest$edate < add_with_rollback(Date,months(2))), .before=Agenda_items) %>%
  mutate(upcoming_close=sum(natl_manifest$edate>Date & natl_manifest$edate<add_with_rollback(Date,months(2)) & natl_manifest$clos), .before=Agenda_items) %>%
  mutate(upcoming_big = sum(natl_manifest$edate>Date & natl_manifest$edate < add_with_rollback(Date,months(2)) &
                              natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom")), .before=Agenda_items) %>%
  mutate(upcoming_big_close = sum(natl_manifest$edate>Date & natl_manifest$edate<add_with_rollback(Date,months(2)) & natl_manifest$clos &
                                    natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom")), .before=Agenda_items) %>%
  mutate(nearby = sum(natl_manifest$edate>add_with_rollback(Date,months(-2)) & natl_manifest$edate < add_with_rollback(Date,months(2))), .before=Agenda_items) %>%
  mutate(nearby_close=sum(natl_manifest$edate>add_with_rollback(Date,months(-2)) & natl_manifest$edate<add_with_rollback(Date,months(2)) & natl_manifest$clos), .before=Agenda_items) %>%
  mutate(nearby_big = sum(natl_manifest$edate>add_with_rollback(Date,months(-2)) & natl_manifest$edate < add_with_rollback(Date,months(2)) &
                            natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom")), .before=Agenda_items) %>%
  mutate(nearby_big_close = sum(natl_manifest$edate>add_with_rollback(Date,months(-2)) & natl_manifest$edate<add_with_rollback(Date,months(2)) & natl_manifest$clos &
                                  natl_manifest$countryname %in% c("Germany","France","Italy","United Kingdom")), .before=Agenda_items) %>% 
  mutate(nearby_skep=sum(natl_manifest$edate>add_with_rollback(Date,months(-2)) & natl_manifest$edate<add_with_rollback(Date,months(2)) & natl_manifest$skep), .before=Agenda_items)


# ingest and set up euroscepticism data
mannheim <- read_dta("./ZA3521_v2-0-1.dta")
# pre-2003 data
mannheim_summary <- mannheim %>%
  filter(year>1989 & nation2!=15 & nation2!=19) %>%
  filter(year>1994 | nation2<15) %>%
  count(year,nation2,benefit) %>%
  pivot_wider(names_from = benefit, values_from = n) %>%
  clean_names() %>%
  mutate(euroscep=x1<x2) %>% #View()
  group_by(year) %>%
  summarise(perc_detriment = sum(euroscep)/n())
mannheim_summary <- mannheim %>%
  filter(year>1989 & nation2!=15 & nation2!=19) %>%
  filter(year>1994 | nation2<15) %>%
  filter(!is.na(benefit)) %>%
  count(year,benefit) %>%
  pivot_wider(names_from = benefit, values_from = n) %>%
  clean_names() %>%
  rowwise() %>%
  transmute(year,net_detriment=(x2-x1)/sum(x1,x2,x8,na.rm = TRUE)) %>%
  merge(mannheim_summary)

# post-2002 data from from multiple eurobarometer files
barom_benefit <- data.frame(matrix(ncol=7,nrow=0))
colnames(barom_benefit) <- c('year','country','benefitted','not_benefitted','refusal','dk','total')
for(y in 2000:2020) {
  for(s in 1:2) {
    p = list.files(path="./eurobarometer/", pattern=paste0(y,"-",s,".*benefit.xls"))
    if (length(p) > 0) {
      x = read_excel(paste0("./eurobarometer/",p), col_names = FALSE, sheet = 1)
      if (ncol(x)==4) {
        x = cbind(x,rep(0,count(x)))
        x = x[,c(1,2,3,5,4)]
      }
      if (ncol(x)==5) {
        x = cbind(x,rep(0,count(x)))
        x = x[,c(1,2,3,4,6,5)]
      }
      x = cbind(rep(y+s/2-0.5,count(x)),x)
      colnames(x) = c('year','country','benefitted','not_benefitted','refusal','dk','total')
      z = which(endsWith(x[,2],"France"))
      for (a in 2:z) {x = x[-1,]}
      x = x[-unlist(count(x)),]
      x = mutate(x,across(colnames(x)[-2],as.numeric))
      if (length(which(endsWith(x[,2],"East")))>0) {
        x = rbind(x,c(y+s/2-0.5,"Germany",
                      x[which(endsWith(x[,2],"East")),3]+x[which(endsWith(x[,2],"West")),3],
                      x[which(endsWith(x[,2],"East")),4]+x[which(endsWith(x[,2],"West")),4],
                      x[which(endsWith(x[,2],"East")),5]+x[which(endsWith(x[,2],"West")),5],
                      x[which(endsWith(x[,2],"East")),6]+x[which(endsWith(x[,2],"West")),6],
                      x[which(endsWith(x[,2],"East")),7]+x[which(endsWith(x[,2],"West")),7]))
        x = x[-which(endsWith(x[,2],"East")),]
        x = x[-which(endsWith(x[,2],"West")),]
      }
      x = mutate(x,across(colnames(x)[-2],as.numeric))
      if (length(which(endsWith(x[,2],"Britain")))>0) {
        x = rbind(x,c(y+s/2-0.5,"United Kingdom",
                      x[which(endsWith(x[,2],"n Ireland")),3]+x[which(endsWith(x[,2],"Britain")),3],
                      x[which(endsWith(x[,2],"n Ireland")),4]+x[which(endsWith(x[,2],"Britain")),4],
                      x[which(endsWith(x[,2],"n Ireland")),5]+x[which(endsWith(x[,2],"Britain")),5],
                      x[which(endsWith(x[,2],"n Ireland")),6]+x[which(endsWith(x[,2],"Britain")),6],
                      x[which(endsWith(x[,2],"n Ireland")),7]+x[which(endsWith(x[,2],"Britain")),7]))
        x = x[-which(endsWith(x[,2],"n Ireland")),]
        x = x[-which(endsWith(x[,2],"Britain")),]
      }
      x[grep("Kingdom",x$country),"country"] <- "United Kingdom"
      x[grep("Germany",x$country),"country"] <- "Germany"
      x[grep("Spain",x$country),"country"] <- "Spain"
      x[grep(" - ",x$country),"country"] <- substring(x[grep(" - ",x$country),"country"]
                                                      ,6,nchar(x[grep(" - ",x$country),"country"]))
      barom_benefit <- rbind(barom_benefit,x)
    }
  }
}

barom_benefit <- mutate(barom_benefit,across(colnames(barom_benefit)[-2],as.numeric))
barom_benefit <- filter(barom_benefit,benefitted>0)

#  heatmap
barom_benefit %>%
  mutate(net = (benefitted-not_benefitted)/total) %>%
  filter(net<0) %>%
  select(1,2,8) %>%
  pivot_wider(names_from = country, values_from = net) %>%
  View()
# comparisons
View(mutate(barom_benefit,net = benefitted<not_benefitted))

# group to year and calculate percentages
barom_benefit_sum <- barom_benefit %>%
  mutate(euroscep = (benefitted<not_benefitted)) %>%
  group_by(year) %>%
  summarise(perc_detriment = sum(euroscep)/n())
barom_benefit_sum <- barom_benefit %>%
  group_by(year) %>%
  summarise(net_detriment=(sum(not_benefitted)-sum(benefitted))/sum(total)) %>%
  merge(barom_benefit_sum)

# merge with session-level data
sessi_data <- mannheim_summary %>%
  rbind(barom_benefit_sum) %>%
  rename(Semester=year) %>%
  merge(sessi_data,all.y=TRUE) %>%
  arrange(Date)
sessi_data <- fill(sessi_data,net_detriment)
sessi_data <- fill(sessi_data,perc_detriment)

# merge with semester-level data
seme_data <- mannheim_summary %>%
  rbind(barom_benefit_sum) %>%
  rename(sem=year) %>%
  merge(seme_data,all.y=TRUE) %>%
  arrange(sem)
seme_data <- fill(seme_data,net_detriment)
seme_data <- fill(seme_data,perc_detriment)


# ingest and set up quality-of-government independent variable for appendix section 6
qog_initial <- read_xlsx("./final-transparency-indices-scores-september-2014.xlsx", col_names = TRUE, sheet = 1) %>%
  clean_names() %>%
  rename(country=x1,year=x2) %>%
  slice(-1) %>%
  mutate(across(colnames(.)[c(2,3,5,7)],as.numeric))
qog_sum <- qog_initial %>%
  filter((country %in% c("Bulgaria","Romania") & year>2006) |
           (country %in% c("Slovenia","Cyprus","Malta","Estonia","Latvia","Lithuania",
                           "Hungary","Poland","Czech Republic","Slovak Republic") & year>2003) |
           (country %in% c("Austria","Finland","Sweden") & year>1994) |
           (country %in% c("France","Germany","Italy","Belgium","Netherlands","Luxembourg",
                           "Denmark","Ireland","United Kingdom","Greece","Spain","Portugal") & year>1989)) %>%
  group_by(year) %>%
  summarise(qog_info=median(information_transparency),
            qog_account=median(accountability_transparency),
            qog_trans=median(transparency_index))
# extrapolate first missing years
for (x in 2011:2012) {
  qog_sum <- add_row(qog_sum,year=x,
                     qog_info=tail(qog_sum$qog_info,1),
                     qog_account=tail(qog_sum$qog_account,1),
                     qog_trans=tail(qog_sum$qog_trans,1))
}
# extrapolate since croatian accession
for (x in 2013:2019) {
  qog_sum <- qog_sum %>%
    add_row(cbind(year=x,qog_initial %>%
                    filter(year==2010 & country %in% c("Croatia","Bulgaria","Romania",
                                                       "Slovenia","Cyprus","Malta","Estonia","Latvia","Lithuania",
                                                       "Hungary","Poland","Czech Republic","Slovak Republic",
                                                       "Austria","Finland","Sweden",
                                                       "Denmark","Ireland","United Kingdom","Greece","Spain","Portugal",
                                                       "France","Germany","Italy","Belgium","Netherlands","Luxembourg")) %>%
                    summarise(qog_info=median(information_transparency),
                              qog_account=median(accountability_transparency),
                              qog_trans=median(transparency_index))))
}
# join to main dataset
sessi_data <- left_join(sessi_data,qog_sum,by="year")
seme_data <- left_join(seme_data,qog_sum,by="year")



# summary stats table
nrow(sessi_data)
sum(seme_data$Sessions_w_meals)
sum(sessi_data$Meals)

mean(seme_data$Sessions)
mean(seme_data$Sessions_w_meals)
mean(sessi_data$Meals)

median(seme_data$Sessions)
median(seme_data$Sessions_w_meals)
median(sessi_data$Meals)

min(seme_data$Sessions)
min(seme_data$Sessions_w_meals)
min(sessi_data$Meals)

max(seme_data$Sessions)
max(seme_data$Sessions_w_meals)
max(sessi_data$Meals)

sd(seme_data$Sessions)
sd(seme_data$Sessions_w_meals)
sd(sessi_data$Meals)

# model 1
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + Date, family=poisson, data=sessi_data))

# model 2
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + cpi_median + ep_power +
              North_Pres + Eurozone + ln_agenda + Date,family=poisson, data=sessi_data))

# model 3
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + upcoming_big_close + perc_detriment + Date,
            family=poisson, data=sessi_data))

# model 4
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + upcoming_big_close + perc_detriment + Date,
            family=poisson,data=sessi_data[
              !grepl("GAERC",sessi_data$Config)&
              !grepl("JHA",sessi_data$Config)&
              !grepl("EYCS",sessi_data$Config),]))

# model 5
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + upcoming_big_close + perc_detriment
            + North_Pres + Eurozone + ln_agenda + Date, family=poisson, data=sessi_data))

# appendix section 1 - formal transparency
# create a transparency variable including an increment in 2006
sessi_data <- mutate(sessi_data,trans_r2=Transparency_all,.after=Date)
sessi_data[sessi_data$Date>"2006-06-16","trans_r2"] <- sessi_data[sessi_data$Date>"2006-06-16","trans_r2"]+1
# test within model 1
summary(glm(formula=Meals ~ trans_r2 + ln_n_members + Date,family=poisson, data=sessi_data))
# test within model 2
summary(glm(formula=Meals ~ trans_r2 + ln_n_members + cpi_median + ep_power +
              North_Pres + Eurozone + ln_agenda + Date,family=poisson, data=sessi_data))
# test within model 3
summary(glm(formula=Meals ~ trans_r2 + ln_n_members + upcoming_big_close + perc_detriment + Date,
            family=poisson, data=sessi_data))
# create a binary transparency variable activating in 2006
sessi_data <- mutate(sessi_data,trans_r2_bin=0,.after=Date)
sessi_data[sessi_data$Date>"2006-06-16","trans_r2_bin"] <- 1
# test these results
summary(glm(formula=Meals ~ trans_r2_bin + Transparency_all + Date,family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ trans_r2_bin + ln_n_members + Date,family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + trans_r2_bin + Date,family=poisson, data=sessi_data))

# appendix section 2 - upcoming elections
summary(glm(formula=Meals ~ upcoming + Date,family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ upcoming_big + Date,family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ upcoming_close + Date,family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ upcoming_big_close + Date,family=poisson, data=sessi_data))

# appendix section 4 - council formations
# create binary variable for single-market-relevant configurations
sessi_data <- mutate(sessi_data,single_mkt = case_when(
  Config == "(GAERC/FAC)" ~ 0,
  Config == "(JHA)" ~ 0,
  Config == "(EYCS)" ~ 0,
  TRUE ~ 1),.after = Config)
# configurations ordered by mean frequency
sessi_data %>%
  group_by(Config,single_mkt) %>%
  summarise(meal_freq=mean(Meals),n()) %>%
  ungroup() %>%
  filter(meal_freq>0 & meal_freq<1) %>% # filters out non-standard, mostly one-off formations
  arrange(meal_freq)

# appendix section 5 - correlation pairs
sessi_data %>% transmute(
  "Formal transparency"=Transparency_all,
  "De facto transparency"=ln_n_members,
  "Normative (CPI) transparency"=cpi_median,
  "Upcoming big close elections"=upcoming_big_close,
  "Euroskepticism"=perc_detriment
) %>%
  pairs(
    upper.panel=panel.cor,
    lower.panel=function(x,y,col=par("col"),bg=NA,pch=par("pch"),cex=1,col.smooth="black",span=2/3,iter=3, ...)
    {panel.smooth(x,y,col,bg,pch,cex,col.smooth,...)})

# appendix section 6 - transparency culture
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + cpi_median + qog_trans + Date,family=poisson, data=sessi_data))

# appendix section 7 - northern council presidencies
summary(glm(formula=Meals~North_Pres+Date, family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ Transparency_all + ln_n_members +
              North_Pres + ln_agenda + Date,family=poisson, data=sessi_data))

# appendix section 9 - time series considerations
# adf tests
adf.test(sessi_data$Meals)
adf.test(seme_data$Meal_freq)
# plots
acf(seme_data$Swm_freq,lag.max = length(seme_data$Swm_freq),xlab = "lag #", ylab = 'ACF',main=' ')
pacf(seme_data$Swm_freq,lag.max = length(seme_data$Swm_freq),xlab = "lag #", ylab = 'ACF',main=' ')
# differenced variable
acf(diff(seme_data$Meal_freq),lag.max = length(diff(seme_data$Meal_freq)),xlab = "lag #", ylab = 'ACF',main=' ')
# lagged dependent variable
sessi_data <- c(0,(sessi_data$Meals)[-length(sessi_data$Meals)]) %>%
  cbind(sessi_data) %>%
  rename("meals_lag"=1) %>%
  relocate(meals_lag,.after=Meals)
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + meals_lag,family=poisson, data=sessi_data))

# appendix section 10 - european parliament power
summary(glm(formula=Meals ~ Transparency_all + ep_power + Date,family=poisson, data=sessi_data))

# appendix section 11.1 - sessions-with-meals logistic regression
# model 1l
summary(glm(formula=swm ~ Transparency_all + ln_n_members + Date, family=binomial, data=sessi_data))
# model 2l
summary(glm(formula=swm ~ Transparency_all + ln_n_members + cpi_median +
              ep_power + North_Pres + Eurozone + ln_agenda + Date,
            family=binomial, data=sessi_data))
# model 3l
summary(glm(formula=swm ~ Transparency_all + ln_n_members + upcoming_big_close + perc_detriment + Date,
            family=binomial, data=sessi_data))
# model 4l
summary(glm(formula=swm ~ Transparency_all + ln_n_members + upcoming_big_close + perc_detriment + Date,family=binomial,
            data=sessi_data[!grepl("GAERC",sessi_data$Config) & !grepl("JHA",sessi_data$Config)& !grepl("EYCS",sessi_data$Config),]))

# appendix section 11.2 - zero-inflated
summary(zeroinfl(formula=Meals ~ Transparency_all | ln_agenda, dist='poisson', data=sessi_data))

# appendix section 12 - semester granularity
# model 1s
summary(glm(formula=Meal_freq~Transparency_all + ln_n_members + Agenda_items + sem, family=gaussian, data=seme_data))
# model 2s
summary(glm(formula=Meal_freq ~ Transparency_all + ln_n_members + cpi_median + ep_power +
              North_Pres + Agenda_items + sem,family=gaussian, data=seme_data))

# appendix section 13 - over-dispersion tests
# figures obtained in the course of running models 1-5 above

# appendix section 14 - interrupted time series
# set up time series with interaction variables
sessi_br1 <- sessi_data %>%
  filter(Transparency_all<3) %>%
  mutate(Transparency_all=Transparency_all-1) %>%
  mutate(t=as.numeric(Date)-9422, .before=Date) %>%
  mutate(interact=t*Transparency_all, .before=Date) %>%
  mutate(Transparency_all=factor(Transparency_all))
sessi_br2 <- sessi_data %>%
  filter(Transparency_all<4 & Transparency_all>1) %>%
  mutate(Transparency_all=Transparency_all-2) %>%
  mutate(t=as.numeric(Date)-11473, .before=Date) %>%
  mutate(interact=t*Transparency_all, .before=Date) %>%
  mutate(Transparency_all=factor(Transparency_all))
sessi_br3 <- sessi_data %>%
  filter(Transparency_all>2) %>%
  mutate(Transparency_all=Transparency_all-3) %>%
  mutate(t=as.numeric(Date)-14578, .before=Date) %>%
  mutate(interact=t*Transparency_all, .before=Date) %>%
  mutate(Transparency_all=factor(Transparency_all))
# test time series with interaction variables
summary(glm(formula=Meals ~ Transparency_all + log_agenda + interact, family=poisson, data=sessi_br1))
summary(glm(formula=Meals ~ Transparency_all + log_agenda + interact, family=poisson, data=sessi_br2))
summary(glm(formula=Meals ~ Transparency_all + log_agenda + interact, family=poisson, data=sessi_br3))

# appendix section 15 - single transparency variable
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + Date, family=poisson, data=sessi_data))
summary(glm(formula=Meals ~ Transparency_all + Date,family=poisson, data=sessi_data))

# appendix section 16 - euro area crisis
# original crisis variable specification
sessi_data$Eurozone <- 0
sessi_data[sessi_data$Date>"2010-05-01" & sessi_data$Date<"2012-08-01","Eurozone"] <- 1
# expanded crisis variable specification
sessi_data$Eurozone <- 0
sessi_data[sessi_data$Date>"2008-09-15" & sessi_data$Date<"2013-07-01","Eurozone"] <- 1
# test within model 2
summary(glm(formula=Meals ~ Transparency_all + ln_n_members + cpi_median + ep_power +
              North_Pres + Eurozone + ln_agenda + Date,family=poisson, data=sessi_data))
# reset to original crisis variable specification
sessi_data$Eurozone <- 0
sessi_data[sessi_data$Date>"2010-05-01" & sessi_data$Date<"2012-08-01","Eurozone"] <- 1
# check
View(filter(sessi_data,Eurozone==1))


# figure 1
seme_data %>%
  mutate("Meals per session (% terms)"=Meal_freq*100) %>%
  rename("Sessions per semester"=Sessions) %>%
  pivot_longer(c("Sessions per semester","Meals per session (% terms)"),names_to = "Legend", values_to = "values") %>% #levels(.$Legend)
  ggplot(aes(sem,values,linetype=Legend)) +
  scale_linetype_discrete(breaks=c("Sessions per semester","Meals per session (% terms)")) +
  geom_line() +
  geom_smooth(method=lm,colour="black",linewidth=0.5) +
  theme(axis.title.y=element_blank(),legend.position = "bottom",legend.title=element_blank()) +
  scale_x_continuous(name="Year",breaks = seq(min(seme_data$sem), max(seme_data$sem), by = 5))


# figure 2 - qa for donut graphs

donut_ecofin <- read_xlsx("./Donut ECOFIN.xlsx",col_names=TRUE,sheet=1)
donut_ecofin <- donut_ecofin[-1,]

donut_gaerc <- read_xlsx("./Donut GAERC.xlsx",col_names=FALSE,sheet=1)
colnames(donut_gaerc) <- colnames(donut_ecofin)

donut_ecofin <- donut_ecofin[donut_ecofin$mentions>4,]
donut_gaerc <- donut_gaerc[donut_gaerc$mentions>4,]

donut_ecofin$fraction <- donut_ecofin$mentions / sum(donut_ecofin$mentions)
donut_ecofin$ymax <- cumsum(donut_ecofin$mentions)
donut_ecofin$ymin <- c(0, head(donut_ecofin$ymax, n = -1))
donut_ecofin$labelPosition <- (donut_ecofin$ymax + donut_ecofin$ymin) / 2
donut_ecofin$label <- paste0(donut_ecofin$Topic,": ", donut_ecofin$mentions)

ggplot(donut_ecofin, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Topic)) +
  geom_rect() +
  geom_text( x=4.6, aes(y=labelPosition, label=label), size=2.7) +
  scale_fill_grey(start=0,end=0.8) +
  coord_polar(theta="y",start=3*pi/2) +
  xlim(c(2, 5)) +
  theme_void() +
  theme(legend.position = "none")

donut_gaerc$fraction <- donut_gaerc$mentions / sum(donut_gaerc$mentions)
donut_gaerc$ymax <- cumsum(donut_gaerc$mentions)
donut_gaerc$ymin <- c(0, head(donut_gaerc$ymax, n = -1))
donut_gaerc$labelPosition <- (donut_gaerc$ymax + donut_gaerc$ymin) / 2
donut_gaerc$label <- paste0(donut_gaerc$Topic,": ", donut_gaerc$mentions)

ggplot(donut_gaerc, aes(ymax=ymax, ymin=ymin, xmax=4, xmin=3, fill=Topic)) +
  geom_rect() +
  geom_text( x=4.5, aes(y=labelPosition, label=label), size=2.5) +
  scale_fill_grey(start=0,end=0.8) +
  coord_polar(theta="y",start=3*pi/2) +
  xlim(c(2, 5)) +
  theme_void() +
  theme(legend.position = "none")


# save results down for modelling-only script
write_xlsx(sessi_data,"./dataset_session.xlsx")
write_xlsx(seme_data,"./dataset_semester.xlsx")

